In this experiment we sorted MPPs from G6PD-Tg mice and from WT littermate controls (send from IMDEA madrid) and barcoded them with the LG2.2 library. Cells were transplanted into irradiated recipients and 3 weeks later the bone marrow was harvested, barcoded cells were sorted and lysed for barcode library sequencing.
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "/Users/jasoncosgrove/Dropbox (Team_Perie)/Jason/Experiments/Wet_lab/JCW24_G6PD_barcoding/LentiviralBarcoding_Analysis/inputs/")
#clear the workspace and load in the necessary packages
rm(list = ls())
library(cowplot)
library(plotrix)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
source('/Users/jasoncosgrove/Dropbox (Team_Perie)/Jason/Experiments/Wet_lab/JCW24_G6PD_barcoding/LentiviralBarcoding_Analysis/helper_methods_mature.R')
setwd("/Users/jasoncosgrove/Dropbox (Team_Perie)/Jason/Experiments/Wet_lab/JCW24_G6PD_barcoding/LentiviralBarcoding_Analysis/inputs/")
# Set output directory
output_fp <- "/Users/jasoncosgrove/Desktop/"
The data has been demultiplexed, QCd and filtered using xcalibr, and R scripts. Full pipeline available in the LentiviralBarcoding_Demultiplexing and LentiviralBarcoding_QC folders
#read in the QC'd and filtered data
wt.mat <- read.csv("JCW24WT_ANALYSIS_matx_norm_filt_cor_ab_trueFalseRU.csv")
tg.mat <- read.csv("JCW24TG_ANALYSIS_matx_norm_filt_cor_ab_trueFalseRU.csv")
Population sizes vary dramatically between different cell types in the hematopoietic system and so a normalisation should be made to the data prior to making biological inferences. We perform a column-wise normalisation such that each column sums to 1. The value in each i,j position then represents the proportional abundance of barcode i in sample j.
# normalise the count matrices
wt.barcodes <- wt.mat$tag
tg.barcodes <- tg.mat$tag
wt.mat.norm <- columnNormalisation(wt.mat[,-1],wt.barcodes)
tg.mat.norm <- columnNormalisation(tg.mat[,-1],tg.barcodes)
#remove clones that dont appear in any sample and remove the mpps from the analysis
wt.mat.norm.filt <- removeUninformativeClones(wt.mat.norm[,!grepl("CKIT",colnames(wt.mat.norm))])
tg.mat.norm.filt <- removeUninformativeClones(tg.mat.norm[,!grepl("CKIT",colnames(tg.mat.norm))])
Split the matrix so that we can analyse each mouse independently
wt.m1<- getMouseMatrix(wt.mat.norm.filt,"M1.WT")
wt.m2<- getMouseMatrix(wt.mat.norm.filt,"M2.WT")
wt.m3<- getMouseMatrix(wt.mat.norm.filt,"M3.WT")
wt.m4<- getMouseMatrix(wt.mat.norm.filt,"M4.WT")
tg.m1<- getMouseMatrix(tg.mat.norm.filt,"M1.TG")
tg.m2<- getMouseMatrix(tg.mat.norm.filt,"M2.TG")
tg.m3<- getMouseMatrix(tg.mat.norm.filt,"M3.TG")
tg.m4<- getMouseMatrix(tg.mat.norm.filt,"M4.TG")
The count matrix actually represents the number of sequencing reads measured for each barcode. But it is not the case that 1 read equals 1 cell because it depends on how many cells you had in the beginning, how much of your sample was sorted, and what is your sequencing depth. We record how many cells we sort and how much volume of our total sample we sort so we can use this information to convert reads to cell numbers.
# now normalise the read counts by cell counts
m1.wt.cellnorm <- cellcountNormalisation("M1-WT", wt.m1)
m2.wt.cellnorm <- cellcountNormalisation("M2-WT", wt.m2)
m3.wt.cellnorm <- cellcountNormalisation("M3-WT", wt.m3)
m4.wt.cellnorm <- cellcountNormalisation("M4-WT", wt.m4)
m1.tg.cellnorm <- cellcountNormalisation("M1-Tg", tg.m1)
m2.tg.cellnorm <- cellcountNormalisation("M2-Tg", tg.m2)
m3.tg.cellnorm <- cellcountNormalisation("M3-Tg", tg.m3)
m4.tg.cellnorm <- cellcountNormalisation("M4-Tg", tg.m4)
Here we combine mice by each sample to create a metasample for each condition. This allows us to look globally at what the difference between the two conditions is
# merge the different mice into a single sample
wt.meta <- makeMetaMouse(wt.m1, wt.m2, wt.m3, wt.m4)
tg.meta <- makeMetaMouse(tg.m1, tg.m2, tg.m3, tg.m4)
# merge the different mice into a single sample
wt.meta.cellnorm <- makeMetaMouse(m1.wt.cellnorm,
m2.wt.cellnorm,
m3.wt.cellnorm,
m4.wt.cellnorm)
tg.meta.cellnorm <- makeMetaMouse(m1.tg.cellnorm,
m2.tg.cellnorm,
m3.tg.cellnorm,
m4.tg.cellnorm)
When analysing lineage tracing data one key metric is the clone size distributions. Here we analyse clone size distributions for the M, E and B lineages in the WT and Tg mice. We observe significant differences in clone size distributions for E and B when we group all mice together
boxplot(wt.meta$M,wt.meta$E, wt.meta$B,
tg.meta$M,tg.meta$E, tg.meta$B ,
outline = F,las = 2,ylab = "relative barcode abundance per lineage", main = "Clone Size Distributions Per Condition per Lineage",
at =c(1,2,3, 5,6,7),col = c("red","red","red","blue","blue","blue"),
names = c("WT - M", "WT - E","WT - B",
"Tg - M", "Tg - E","Tg - B"))
wilcox.test(wt.meta$M,tg.meta$M)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta$M and tg.meta$M
## W = 234804, p-value = 0.5358
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.meta$B,tg.meta$B)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta$B and tg.meta$B
## W = 222583, p-value = 0.0233
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.meta$E,tg.meta$E)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta$E and tg.meta$E
## W = 223406, p-value = 0.03011
## alternative hypothesis: true location shift is not equal to 0
#clone size distributions normalised by cell count
boxplot(wt.meta.cellnorm$M,wt.meta.cellnorm$E, wt.meta.cellnorm$B,
tg.meta.cellnorm$M,tg.meta.cellnorm$E, tg.meta.cellnorm$B ,
outline = F,
las = 2,ylab = "cells per barcode per lineage", main = "Clone Size Distributions in Cell Counts per Condition per Lineage",
at =c(1,2,3, 5,6,7),col = c("red","red","red","blue","blue","blue"),
names = c("WT - M", "WT - E","WT - B",
"Tg - M", "Tg - E","Tg - B"))
wilcox.test(wt.meta.cellnorm$M,tg.meta.cellnorm$M)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta.cellnorm$M and tg.meta.cellnorm$M
## W = 226516, p-value = 0.08257
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.meta.cellnorm$B,tg.meta.cellnorm$B)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta.cellnorm$B and tg.meta.cellnorm$B
## W = 237815, p-value = 0.8307
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.meta.cellnorm$E,tg.meta.cellnorm$E)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.meta.cellnorm$E and tg.meta.cellnorm$E
## W = 218966, p-value = 0.005592
## alternative hypothesis: true location shift is not equal to 0
Here we plot clone size distributions for each lineage for each mouse
boxplot(wt.m1$M,wt.m1$E, wt.m1$B,
wt.m2$M,wt.m2$E, wt.m2$B,
wt.m3$M,wt.m3$E, wt.m3$B,
wt.m4$M,wt.m4$E, wt.m4$B,
tg.m1$M,tg.m1$E, tg.m1$B ,
tg.m2$M,tg.m2$E, tg.m2$B ,
tg.m3$M,tg.m3$E, tg.m3$B ,
tg.m4$M,tg.m4$E, tg.m4$B ,
outline = F,las = 2,ylab = "relative barcode abundance",
main = "Clone Size Distributions Per Condition per Mouse",
col = c(rep("red",12),rep("blue",12)),
names = c("WT1 - M", "WT1 - E","WT1 - B",
"WT2 - M", "WT2 - E","WT2 - B",
"WT3 - M", "WT3 - E","WT3 - B",
"WT4 - M", "WT4 - E","WT4 - B",
"Tg1 - M", "Tg1 - E","Tg1 - B",
"Tg2 - M", "Tg2 - E","Tg2 - B",
"Tg3 - M", "Tg3 - E","Tg3 - B",
"Tg4 - M", "Tg4 - E","Tg4 - B"))
#Plot the median Myeloid clone size for each mouse, comparing WT and Tg
plotCloneSizes(wt.m1,wt.m2,wt.m3,wt.m4,
tg.m1,tg.m2,tg.m3,tg.m4,"M")
plotCloneSizes(wt.m1,wt.m2,wt.m3,wt.m4,
tg.m1,tg.m2,tg.m3,tg.m4,"B")
plotCloneSizes(wt.m1,wt.m2,wt.m3,wt.m4,
tg.m1,tg.m2,tg.m3,tg.m4,"E")
boxplot(m1.wt.cellnorm$M,m1.wt.cellnorm$E, m1.wt.cellnorm$B,
m2.wt.cellnorm$M,m2.wt.cellnorm$E, m2.wt.cellnorm$B,
m3.wt.cellnorm$M,m3.wt.cellnorm$E, m3.wt.cellnorm$B,
m4.wt.cellnorm$M,m4.wt.cellnorm$E, m4.wt.cellnorm$B,
m1.tg.cellnorm$M,m1.tg.cellnorm$E, m1.tg.cellnorm$B ,
m2.tg.cellnorm$M,m2.tg.cellnorm$E, m2.tg.cellnorm$B ,
m3.tg.cellnorm$M,m3.tg.cellnorm$E, m3.tg.cellnorm$B ,
m4.tg.cellnorm$M,m4.tg.cellnorm$E, m4.tg.cellnorm$B ,
outline = F,las = 2,ylab = "relative barcode abundance",
main = "Clone Size Distributions Per Condition per Mouse",
col = c(rep("red",12),rep("blue",12)),
names = c("WT1 - M", "WT1 - E","WT1 - B",
"WT2 - M", "WT2 - E","WT2 - B",
"WT3 - M", "WT3 - E","WT3 - B",
"WT4 - M", "WT4 - E","WT4 - B",
"Tg1 - M", "Tg1 - E","Tg1 - B",
"Tg2 - M", "Tg2 - E","Tg2 - B",
"Tg3 - M", "Tg3 - E","Tg3 - B",
"Tg4 - M", "Tg4 - E","Tg4 - B"))
plotCloneSizes(m1.wt.cellnorm,m2.wt.cellnorm,m3.wt.cellnorm,m4.wt.cellnorm,
m1.tg.cellnorm,m2.tg.cellnorm,m3.tg.cellnorm,m4.tg.cellnorm,"M")
plotCloneSizes(m1.wt.cellnorm,m2.wt.cellnorm,m3.wt.cellnorm,m4.wt.cellnorm,
m1.tg.cellnorm,m2.tg.cellnorm,m3.tg.cellnorm,m4.tg.cellnorm,"B")
plotCloneSizes(m1.wt.cellnorm,m2.wt.cellnorm,m3.wt.cellnorm,m4.wt.cellnorm,
m1.tg.cellnorm,m2.tg.cellnorm,m3.tg.cellnorm,m4.tg.cellnorm,"E")
Aside from clone size distributions we can also look at how many clones there are in each celltype. This gives an indication of how many MPPs contribute to each cell lineage
barplot(c(wt.meta %>% filter(M > 0) %>% nrow(),
tg.meta %>% filter(M > 0) %>% nrow(),
wt.meta %>% filter(E > 0) %>% nrow(),
tg.meta %>% filter(E > 0) %>% nrow(),
wt.meta %>% filter(B > 0) %>% nrow(),
tg.meta %>% filter(B > 0) %>% nrow()),
names = c("WT-M","Tg-M",
"WT-E","Tg-E",
"WT-B","Tg-B"),
col = c("red","blue","red","blue","red","blue"),
main = "clonal diversity")
Here we how the same as above but show mouse to mouse variability. We observe no significant differences between WT and Tg in this setting
#compute the total clonal diversity in each setting
wt.m <- c(table(wt.m1$M > 0)[[2]],
table(wt.m2$M > 0)[[2]],
table(wt.m3$M > 0)[[2]],
table(wt.m4$M > 0)[[2]])
tg.m <- c(table(tg.m1$M > 0)[[2]],
table(tg.m2$M > 0)[[2]],
table(tg.m3$M > 0)[[2]],
table(tg.m4$M > 0)[[2]])
boxplot(wt.m,tg.m, main = "Comparison of clone size distributions between WT and Tg in Myeloid",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("blue","red"))
wt.e <- c(table(wt.m1$E > 0)[[2]],
table(wt.m2$E > 0)[[2]],
table(wt.m3$E > 0)[[2]],
table(wt.m4$E > 0)[[2]])
tg.e <- c(table(tg.m1$E > 0)[[2]],
table(tg.m2$E > 0)[[2]],
table(tg.m3$E > 0)[[2]],
table(tg.m4$E > 0)[[2]])
boxplot(wt.e,tg.e, main = "Comparison of clone size distributions between WT and Tg in Erythroid",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("blue","red"))
wt.b <- c(table(wt.m1$B > 0)[[2]],
table(wt.m2$B > 0)[[2]],
table(wt.m3$B > 0)[[2]],
table(wt.m4$B > 0)[[2]])
tg.b <- c(table(tg.m1$B > 0)[[2]],
table(tg.m2$B > 0)[[2]],
table(tg.m3$B > 0)[[2]],
table(tg.m4$B > 0)[[2]])
boxplot(wt.b,tg.b, main = "Comparison of clone size distributions between WT and Tg in B",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("blue","red"))
Now we have analysed differences in clone size distributions and clonal diversity we can look at barcode sharing across lineages. First we will compare 2 lineages at a time using a simple scatter plot
qplot(log(wt.meta.cellnorm$M+ 1) ,log(wt.meta.cellnorm$E + 1),main = "WT M vs E with cell normalisation", xlim = c(0,11),ylim = c(0,11)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
qplot(log(tg.meta.cellnorm$M+ 1) ,log(tg.meta.cellnorm$E + 1),main = "Tg M vs E with cell normalisation" , xlim = c(0,11),ylim = c(0,11)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
qplot(log(wt.meta.cellnorm$M+ 1) ,log(wt.meta.cellnorm$B + 1),main = "WT M vs B with cell normalisation", xlim = c(0,12),ylim = c(0,12)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
qplot(log(tg.meta.cellnorm$M+ 1) ,log(tg.meta.cellnorm$B + 1),main = "Tg M vs B with cell normalisation" , xlim = c(0,12),ylim = c(0,12)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
qplot(log(wt.meta.cellnorm$B+ 1) ,log(wt.meta.cellnorm$E + 1),main = "WT B vs E with cell normalisation", xlim = c(0,12),ylim = c(0,12)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
qplot(log(tg.meta.cellnorm$B+ 1) ,log(tg.meta.cellnorm$E + 1),main = "Tg B vs E with cell normalisation" , xlim = c(0,12),ylim = c(0,12)) + geom_abline(intercept = 0,slope = 1, col = 'grey30') + geom_point(size=1)
We can also analyse barcode sharing by heatmap visualisation and unsupervised hierarchical clustering. We do not observe much striking differences between the WT and Tg
#heatmap with relative clone abundances
heatmap(log(as.matrix(wt.meta[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
heatmap(log(as.matrix(tg.meta[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
#heatmap with cell-count normalisation
heatmap(log(as.matrix(wt.meta.cellnorm[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
heatmap(log(as.matrix(tg.meta.cellnorm[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
Another way to look at biases across 3 lineages is to do a ternary plot. We dont observe any striking differences between WT and Tg using this visualisation.
#row-wise normalisations
tg.meta.norm <- performRowNormalisation(tg.meta)
wt.meta.norm <- performRowNormalisation(wt.meta)
tg.meta.cellnorm.norm <- performRowNormalisation(tg.meta.cellnorm)
wt.meta.cellnorm.norm <- performRowNormalisation(wt.meta.cellnorm)
triax.plot(wt.meta.norm, col.symbols = "red",show.grid = T,pch = 16)
triax.plot(tg.meta.norm, col.symbols = "blue",show.grid = T,pch = 16)
triax.plot(wt.meta.cellnorm.norm, col.symbols = "red",show.grid = F,pch = 16)
triax.plot(tg.meta.cellnorm.norm, col.symbols = "blue",show.grid = F,pch = 16)
threshold <- 0.1
barplot(multiOutcomeClassifier(wt.meta.norm, threshold = threshold)$table,ylim = c(0,300), main = "WT")
barplot(multiOutcomeClassifier(tg.meta.norm, threshold = threshold)$table,ylim = c(0,300), main = "Tg")
barplot(multiOutcomeClassifier(wt.meta.cellnorm.norm, threshold = threshold)$table,ylim = c(0,300), main = "WT cellnorm")
barplot(multiOutcomeClassifier(tg.meta.cellnorm.norm, threshold = threshold)$table,ylim = c(0,300), main = "Tg cellnorm")
m1.wt.cellnorm.norm <- performRowNormalisation(m1.wt.cellnorm)
barplot(multiOutcomeClassifier(m1.wt.cellnorm.norm, threshold = threshold)$table, main = "M1 WT" ,ylim = c(0,100))
m2.wt.cellnorm.norm <- performRowNormalisation(m2.wt.cellnorm)
barplot(multiOutcomeClassifier(m2.wt.cellnorm.norm, threshold = threshold)$table ,main = "M2 WT",ylim = c(0,100))
m3.wt.cellnorm.norm <- performRowNormalisation(m3.wt.cellnorm)
barplot(multiOutcomeClassifier(m3.wt.cellnorm.norm, threshold = threshold)$table,main = "M3 WT",ylim = c(0,100))
m4.wt.cellnorm.norm <- performRowNormalisation(m4.wt.cellnorm)
barplot(multiOutcomeClassifier(m4.wt.cellnorm.norm, threshold = threshold)$table,main = "M4 WT",ylim = c(0,100))
m1.tg.cellnorm.norm <- performRowNormalisation(m1.tg.cellnorm)
barplot(multiOutcomeClassifier(m1.tg.cellnorm.norm, threshold = threshold)$table ,main = "M1 Tg",ylim = c(0,100))
m2.tg.cellnorm.norm <- performRowNormalisation(m2.tg.cellnorm)
barplot(multiOutcomeClassifier(m2.tg.cellnorm.norm, threshold = threshold)$table ,main = "M2 Tg",ylim = c(0,100))
m3.tg.cellnorm.norm <- performRowNormalisation(m3.tg.cellnorm)
barplot(multiOutcomeClassifier(m3.tg.cellnorm.norm, threshold = threshold)$table,main = "M3 Tg",ylim = c(0,100))
m4.tg.cellnorm.norm <- performRowNormalisation(m4.tg.cellnorm)
barplot(multiOutcomeClassifier(m4.tg.cellnorm.norm, threshold = threshold)$table,main = "M4 Tg",ylim = c(0,100))
plotClassifierLineage("B",threshold)
## Warning in wilcox.test.default(wt, tg): cannot compute exact p-value with ties
plotClassifierLineage("BE",threshold)
## Warning in wilcox.test.default(wt, tg): cannot compute exact p-value with ties
plotClassifierLineage("BM",threshold)
## Warning in wilcox.test.default(wt, tg): cannot compute exact p-value with ties
plotClassifierLineage("BME",threshold)
## Warning in wilcox.test.default(wt, tg): cannot compute exact p-value with ties
plotClassifierLineage("E",threshold)
plotClassifierLineage("M",threshold)
## Warning in wilcox.test.default(wt, tg): cannot compute exact p-value with ties
plotClassifierLineage("ME",threshold)
wt.cumsum <- generateCumulativeProbability(wt.meta.cellnorm,"B")
tg.cumsum <- generateCumulativeProbability(tg.meta.cellnorm,"B")
plot(wt.cumsum$cell_counts,wt.cumsum$cumsum,col = "red",lwd = 1,
ylim = c(0,5e5))
points(tg.cumsum$cell_counts,tg.cumsum$cumsum,col = "blue",lwd = 1)
wt.cumsum <- generateCumulativeProbabilityNorm(wt.meta.cellnorm,"B")
tg.cumsum <- generateCumulativeProbabilityNorm(tg.meta.cellnorm,"B")
plot(1:nrow(wt.cumsum)/nrow(wt.cumsum),wt.cumsum$cumsum,col = "red",lwd = 1,xlab = "% of all clones", ylab = "cumulative cellular output")
points(1:nrow(tg.cumsum)/nrow(tg.cumsum),tg.cumsum$cumsum,col = "blue",lwd = 1)
m1.wt <- generateCumulativeProbabilityNorm(m1.wt.cellnorm,"B")
m2.wt <- generateCumulativeProbabilityNorm(m2.wt.cellnorm,"B")
m3.wt <- generateCumulativeProbabilityNorm(m3.wt.cellnorm,"B")
m4.wt <- generateCumulativeProbabilityNorm(m4.wt.cellnorm,"B")
m1.tg <- generateCumulativeProbabilityNorm(m1.tg.cellnorm,"B")
m2.tg <- generateCumulativeProbabilityNorm(m2.tg.cellnorm,"B")
m3.tg <- generateCumulativeProbabilityNorm(m3.tg.cellnorm,"B")
m4.tg <- generateCumulativeProbabilityNorm(m4.tg.cellnorm,"B")
plot(1:nrow(m1.wt)/nrow(m1.wt),m1.wt$cumsum,col = "red",lwd = 1,xlim = c(0,1),
xlab = "% of all clones", ylab = "cumulative number of cells")
points(1:nrow(m2.wt)/nrow(m2.wt),m2.wt$cumsum,col = "orange",lwd = 1)
points(1:nrow(m3.wt)/nrow(m3.wt),m3.wt$cumsum,col = "dark red",lwd = 1)
points(1:nrow(m4.wt)/nrow(m4.wt),m4.wt$cumsum,col = "yellow",lwd = 1)
points(1:nrow(m1.tg)/nrow(m1.tg),m1.tg$cumsum,col = "blue",lwd = 1)
points(1:nrow(m2.tg)/nrow(m2.tg),m2.tg$cumsum,col = "dark blue",lwd = 1)
points(1:nrow(m3.tg)/nrow(m3.tg),m3.tg$cumsum,col = "green",lwd = 1)
points(1:nrow(m4.tg)/nrow(m4.tg),m4.tg$cumsum,col = "dark green",lwd = 1)
abline(h = 0.05, col = 'red')
text(0.2,0.05,"threshold for high output clones",srt=0.1,pos=3, col = "red",cex = 0.7)
wt.highoutputclones <- c(
m1.wt %>% filter(cumsum > 0.05) %>% nrow(),
m2.wt %>% filter(cumsum > 0.05) %>% nrow(),
m3.wt %>% filter(cumsum > 0.05) %>% nrow(),
m4.wt %>% filter(cumsum > 0.05) %>% nrow())
tg.highoutputclones <- c(
m1.tg %>% filter(cumsum > 0.05) %>% nrow(),
m2.tg %>% filter(cumsum > 0.05) %>% nrow(),
m3.tg %>% filter(cumsum > 0.05) %>% nrow(),
m4.tg %>% filter(cumsum > 0.05) %>% nrow())
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in B: clonal diversity",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
#data is normally distributed
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.97025, p-value = 0.9
t.test(wt.highoutputclones,tg.highoutputclones)
##
## Welch Two Sample t-test
##
## data: wt.highoutputclones and tg.highoutputclones
## t = -2.4814, df = 5.8167, p-value = 0.04897
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -28.41003311 -0.08996689
## sample estimates:
## mean of x mean of y
## 20.25 34.50
#plot the clone size distributions for both conditions
wt.allmice <- unlist(c(m1.wt %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.wt %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.wt %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.wt %>% filter(cumsum > 0.05) %>% select(cell_counts)))
tg.allmice <- unlist(c(m1.tg %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.tg %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.tg %>% filter(cumsum > 0.05) %>% select(cell_counts),
m2.tg %>% filter(cumsum > 0.05) %>% select(cell_counts)))
boxplot(wt.allmice,
tg.allmice ,outline = F,
main = "High output clones in B: clone sizes",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
shapiro.test(c(wt.allmice,
tg.allmice))
##
## Shapiro-Wilk normality test
##
## data: c(wt.allmice, tg.allmice)
## W = 0.56225, p-value < 2.2e-16
wilcox.test(wt.allmice,tg.allmice)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.allmice and tg.allmice
## W = 8454, p-value = 4.031e-09
## alternative hypothesis: true location shift is not equal to 0
write.csv(wt.allmice,"/Users/jasoncosgrove/Desktop/wt_clonesize.csv")
write.csv(tg.allmice,"/Users/jasoncosgrove/Desktop/tg_clonesize.csv")
#now show the median clone size for each mouse
wt.highoutputclones <- unlist(c(
m1.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
tg.highoutputclones <- unlist(c(
m1.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
#data is normally distributed
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in B: clone sizes",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.94401, p-value = 0.6509
t.test(wt.highoutputclones,tg.highoutputclones)
##
## Welch Two Sample t-test
##
## data: wt.highoutputclones and tg.highoutputclones
## t = 2.6073, df = 5.0571, p-value = 0.04731
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 18.50125 2105.34364
## sample estimates:
## mean of x mean of y
## 1692.0576 630.1351
wt.clones <- c(m1.wt %>% filter(cumsum > 0.05) %>% rownames,
m2.wt %>% filter(cumsum > 0.05) %>% rownames,
m3.wt %>% filter(cumsum > 0.05) %>% rownames,
m4.wt %>% filter(cumsum > 0.05) %>% rownames)
tg.clones <- c(m1.tg %>% filter(cumsum > 0.05) %>% rownames,
m2.tg %>% filter(cumsum > 0.05) %>% rownames,
m3.tg %>% filter(cumsum > 0.05) %>% rownames,
m4.tg %>% filter(cumsum > 0.05) %>% rownames)
wt.subset <- wt.meta.cellnorm[wt.clones,]
tg.subset <- tg.meta.cellnorm[tg.clones,]
wt.subset.rownorm <- wt.meta.cellnorm.norm[wt.clones,]
tg.subset.rownorm <- tg.meta.cellnorm.norm[tg.clones,]
#heatmap with cell-count normalisation
heatmap(log(as.matrix(wt.subset[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
heatmap(log(as.matrix(tg.subset[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
boxplot(wt.subset.rownorm$B, tg.subset.rownorm$B,
wt.subset.rownorm$M, tg.subset.rownorm$M,
wt.subset.rownorm$E, tg.subset.rownorm$E)
write.csv(wt.subset.rownorm,"/Users/jasoncosgrove/Desktop/wt_rownorm.csv")
write.csv(tg.subset.rownorm,"/Users/jasoncosgrove/Desktop/tg_rownorm.csv")
wilcox.test(wt.subset.rownorm$B, tg.subset.rownorm$B)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.subset.rownorm$B and tg.subset.rownorm$B
## W = 7056, p-value = 0.001197
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.subset.rownorm$M, tg.subset.rownorm$M)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.subset.rownorm$M and tg.subset.rownorm$M
## W = 4436, p-value = 0.0109
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(wt.subset.rownorm$E, tg.subset.rownorm$E)
##
## Wilcoxon rank sum test with continuity correction
##
## data: wt.subset.rownorm$E and tg.subset.rownorm$E
## W = 3537, p-value = 5.849e-06
## alternative hypothesis: true location shift is not equal to 0
#heatmap with cell-count normalisation
heatmap(log(as.matrix(wt.subset[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
heatmap(log(as.matrix(tg.subset[,c("B","M","E")] + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
combined.mat <- rbind(wt.subset,tg.subset)
new.mat <- matrix(0,nrow = nrow(combined.mat),ncol = 6)
rownames(new.mat) <- row.names(combined.mat)
colnames(new.mat) <- c("WT-B", "WT-M","WT-E",
"Tg-B", "Tg-M","Tg-E")
new.mat <- data.frame(new.mat)
new.mat[1:nrow(wt.subset),1:3] <- wt.subset[,1:3]
new.mat[82:219,4:6] <- tg.subset[,1:3]
heatmap(log(as.matrix(new.mat + 1)) ,col=colorRampPalette(c("black","green","red"),bias = 2)(300),scale = "none",distfun = function(x) dist(x,method = 'euclidean'))
#lets run the same analysis in the other lineages to see if it’s really a B-cell specific phenomenon
wt.cumsum <- generateCumulativeProbability(wt.meta.cellnorm,"M")
tg.cumsum <- generateCumulativeProbability(tg.meta.cellnorm,"M")
plot(wt.cumsum$cell_counts,wt.cumsum$cumsum,col = "red",lwd = 1,
ylim = c(0,5e5))
points(tg.cumsum$cell_counts,tg.cumsum$cumsum,col = "blue",lwd = 1)
wt.cumsum <- generateCumulativeProbabilityNorm(wt.meta.cellnorm,"M")
tg.cumsum <- generateCumulativeProbabilityNorm(tg.meta.cellnorm,"M")
plot(1:nrow(wt.cumsum)/nrow(wt.cumsum),wt.cumsum$cumsum,col = "red",lwd = 1,xlab = "% of all clones", ylab = "cumulative cellular output")
points(1:nrow(tg.cumsum)/nrow(tg.cumsum),tg.cumsum$cumsum,col = "blue",lwd = 1)
m1.wt <- generateCumulativeProbabilityNorm(m1.wt.cellnorm,"M")
m2.wt <- generateCumulativeProbabilityNorm(m2.wt.cellnorm,"M")
m3.wt <- generateCumulativeProbabilityNorm(m3.wt.cellnorm,"M")
m4.wt <- generateCumulativeProbabilityNorm(m4.wt.cellnorm,"M")
m1.tg <- generateCumulativeProbabilityNorm(m1.tg.cellnorm,"M")
m2.tg <- generateCumulativeProbabilityNorm(m2.tg.cellnorm,"M")
m3.tg <- generateCumulativeProbabilityNorm(m3.tg.cellnorm,"M")
m4.tg <- generateCumulativeProbabilityNorm(m4.tg.cellnorm,"M")
plot(1:nrow(m1.wt)/nrow(m1.wt),m1.wt$cumsum,col = "red",lwd = 1,xlim = c(0,1),
xlab = "% of all clones", ylab = "cumulative number of cells")
points(1:nrow(m2.wt)/nrow(m2.wt),m2.wt$cumsum,col = "orange",lwd = 1)
points(1:nrow(m3.wt)/nrow(m3.wt),m3.wt$cumsum,col = "dark red",lwd = 1)
points(1:nrow(m4.wt)/nrow(m4.wt),m4.wt$cumsum,col = "yellow",lwd = 1)
points(1:nrow(m1.tg)/nrow(m1.tg),m1.tg$cumsum,col = "blue",lwd = 1)
points(1:nrow(m2.tg)/nrow(m2.tg),m2.tg$cumsum,col = "dark blue",lwd = 1)
points(1:nrow(m3.tg)/nrow(m3.tg),m3.tg$cumsum,col = "green",lwd = 1)
points(1:nrow(m4.tg)/nrow(m4.tg),m4.tg$cumsum,col = "dark green",lwd = 1)
abline(h = 0.05, col = 'red')
text(0.2,0.05,"threshold for high output clones",srt=0.1,pos=3, col = "red",cex = 0.7)
wt.highoutputclones <- c(
m1.wt %>% filter(cumsum > 0.05) %>% nrow(),
m2.wt %>% filter(cumsum > 0.05) %>% nrow(),
m3.wt %>% filter(cumsum > 0.05) %>% nrow(),
m4.wt %>% filter(cumsum > 0.05) %>% nrow())
tg.highoutputclones <- c(
m1.tg %>% filter(cumsum > 0.05) %>% nrow(),
m2.tg %>% filter(cumsum > 0.05) %>% nrow(),
m3.tg %>% filter(cumsum > 0.05) %>% nrow(),
m4.tg %>% filter(cumsum > 0.05) %>% nrow())
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in M: clonal diversity",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
#data is normally distributed
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.92559, p-value = 0.4768
t.test(wt.highoutputclones,tg.highoutputclones)
##
## Welch Two Sample t-test
##
## data: wt.highoutputclones and tg.highoutputclones
## t = 0.28664, df = 5.441, p-value = 0.785
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -34.89138 43.89138
## sample estimates:
## mean of x mean of y
## 57.25 52.75
wt.highoutputclones <- unlist(c(
m1.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
tg.highoutputclones <- unlist(c(
m1.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
#data is normally distributed
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in M: clone sizes",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.85835, p-value = 0.1156
t.test(wt.highoutputclones,tg.highoutputclones)
##
## Welch Two Sample t-test
##
## data: wt.highoutputclones and tg.highoutputclones
## t = 0.17867, df = 5.5682, p-value = 0.8645
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1495.472 1726.307
## sample estimates:
## mean of x mean of y
## 1329.318 1213.901
#lets run the same analysis in the other lineages to see if it’s really a B-cell specific phenomenon
wt.cumsum <- generateCumulativeProbability(wt.meta.cellnorm,"E")
tg.cumsum <- generateCumulativeProbability(tg.meta.cellnorm,"E")
plot(wt.cumsum$cell_counts,wt.cumsum$cumsum,col = "red",lwd = 1,
ylim = c(0,5e5))
points(tg.cumsum$cell_counts,tg.cumsum$cumsum,col = "blue",lwd = 1)
wt.cumsum <- generateCumulativeProbabilityNorm(wt.meta.cellnorm,"E")
tg.cumsum <- generateCumulativeProbabilityNorm(tg.meta.cellnorm,"E")
plot(1:nrow(wt.cumsum)/nrow(wt.cumsum),wt.cumsum$cumsum,col = "red",lwd = 1,xlab = "% of all clones", ylab = "cumulative cellular output")
points(1:nrow(tg.cumsum)/nrow(tg.cumsum),tg.cumsum$cumsum,col = "blue",lwd = 1)
m1.wt <- generateCumulativeProbabilityNorm(m1.wt.cellnorm,"E")
m2.wt <- generateCumulativeProbabilityNorm(m2.wt.cellnorm,"E")
m3.wt <- generateCumulativeProbabilityNorm(m3.wt.cellnorm,"E")
m4.wt <- generateCumulativeProbabilityNorm(m4.wt.cellnorm,"E")
m1.tg <- generateCumulativeProbabilityNorm(m1.tg.cellnorm,"E")
m2.tg <- generateCumulativeProbabilityNorm(m2.tg.cellnorm,"E")
m3.tg <- generateCumulativeProbabilityNorm(m3.tg.cellnorm,"E")
m4.tg <- generateCumulativeProbabilityNorm(m4.tg.cellnorm,"E")
plot(1:nrow(m1.wt)/nrow(m1.wt),m1.wt$cumsum,col = "red",lwd = 1,xlim = c(0,1),
xlab = "% of all clones", ylab = "cumulative number of cells")
points(1:nrow(m2.wt)/nrow(m2.wt),m2.wt$cumsum,col = "orange",lwd = 1)
points(1:nrow(m3.wt)/nrow(m3.wt),m3.wt$cumsum,col = "dark red",lwd = 1)
points(1:nrow(m4.wt)/nrow(m4.wt),m4.wt$cumsum,col = "yellow",lwd = 1)
points(1:nrow(m1.tg)/nrow(m1.tg),m1.tg$cumsum,col = "blue",lwd = 1)
points(1:nrow(m2.tg)/nrow(m2.tg),m2.tg$cumsum,col = "dark blue",lwd = 1)
points(1:nrow(m3.tg)/nrow(m3.tg),m3.tg$cumsum,col = "green",lwd = 1)
points(1:nrow(m4.tg)/nrow(m4.tg),m4.tg$cumsum,col = "dark green",lwd = 1)
abline(h = 0.05, col = 'red')
text(0.2,0.05,"threshold for high output clones",srt=0.1,pos=3, col = "red",cex = 0.7)
wt.highoutputclones <- c(
m1.wt %>% filter(cumsum > 0.05) %>% nrow(),
m2.wt %>% filter(cumsum > 0.05) %>% nrow(),
m3.wt %>% filter(cumsum > 0.05) %>% nrow(),
m4.wt %>% filter(cumsum > 0.05) %>% nrow())
tg.highoutputclones <- c(
m1.tg %>% filter(cumsum > 0.05) %>% nrow(),
m2.tg %>% filter(cumsum > 0.05) %>% nrow(),
m3.tg %>% filter(cumsum > 0.05) %>% nrow(),
m4.tg %>% filter(cumsum > 0.05) %>% nrow())
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in E: clonal diversity",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
#data is normally distributed
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.9456, p-value = 0.6669
t.test(wt.highoutputclones,tg.highoutputclones)
##
## Welch Two Sample t-test
##
## data: wt.highoutputclones and tg.highoutputclones
## t = -0.33491, df = 4.6363, p-value = 0.7523
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -46.51546 36.01546
## sample estimates:
## mean of x mean of y
## 60.00 65.25
wt.highoutputclones <- unlist(c(
m1.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.wt %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
tg.highoutputclones <- unlist(c(
m1.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m2.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m3.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts)),
m4.tg %>% filter(cumsum > 0.05) %>% summarise(median = median(cell_counts))))
#data is normally distributed
boxplot(wt.highoutputclones,
tg.highoutputclones ,outline = F,
main = "High output clones in E: clone sizes",
ylab = "total number of clones per mouse",names = c("WT","Tg"), col = c("red","blue"))
shapiro.test(c(wt.highoutputclones,
tg.highoutputclones))
##
## Shapiro-Wilk normality test
##
## data: c(wt.highoutputclones, tg.highoutputclones)
## W = 0.77251, p-value = 0.01444
wilcox.test(wt.highoutputclones,tg.highoutputclones)
##
## Wilcoxon rank sum exact test
##
## data: wt.highoutputclones and tg.highoutputclones
## W = 8, p-value = 1
## alternative hypothesis: true location shift is not equal to 0